library(rjson)
library(stringr)
library(ggplot2)
library(R.utils)
library(multiwayvcov)
library(lmtest)
library(stm)
library(stopwords)
library(data.table)
library(stargazer)
library(here)
library(sentimentr)
library(lubridate)


#########################
#########################
# TOPIC MODEL PREP
#########################
#########################

setwd(here("release_data"))

load("TopicModel.RData")

#get topics per doc to get topic proportions/day
dt <- make.dt(immigrFit)

#define crime and welfare topics

dt$crime <- dt$Topic1 + dt$Topic3
dt$welfare <- dt$Topic13

dt <- dt[,c("docnum","crime","welfare")]

meta <- out$meta

meta <- cbind(meta, dt)

meta$date <- as.Date(meta$date)

#make sure to get any dates with zero segs
unique_dates <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates$channel <- "fox"

unique_dates2 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates2$channel <- "msnbc"

unique_dates3 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates3$channel <- "cnn"

unique_dates <- rbind(unique_dates, unique_dates2, unique_dates3)


#new coverage measure
meta$crime_dur <- meta$crime
meta$welf_dur <- meta$welfare
meta$trump_dur <- meta$trump

unique_dates <- data.frame(date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))

crime_dur <- aggregate(meta$crime_dur, list(meta$date), sum)
colnames(crime_dur)<- c("date","crime_news")

welf_dur <- aggregate(meta$welf_dur, list(meta$date), sum)
colnames(welf_dur)<- c("date","welfare_news")

trump_dur <- aggregate(meta$trump_dur, list(meta$date), sum)
colnames(trump_dur)<- c("date","trump_news")

crime_dur <- merge(crime_dur, unique_dates, by="date", all=T)
welf_dur <- merge(welf_dur, unique_dates, by="date", all=T)
trump_dur <- merge(trump_dur, unique_dates,  by="date", all=T)

crime_dur$crime_news[is.na(crime_dur$crime_news)]<- 0
welf_dur$welfare_news[is.na(welf_dur$welfare_news)]<- 0
trump_dur$trump_news[is.na(trump_dur$trump_news)]<- 0

news <- merge(trump_dur,merge(crime_dur, welf_dur, by="date"),by="date")


#########################
#########################
# BING SEARCH
#########################
#########################

setwd(here("release_data"))

agg_crime <- read.csv("crime_proportions.csv")
agg_welfare <- read.csv("welfare_proportions.csv")
agg_report <- read.csv("report_proportions.csv")
agg_hsi <- read.csv("hsi_proportions.csv")
agg_weather <- read.csv("weather_proportions.csv")

colnames(agg_crime) <- c("date","crime_pct","admin")
colnames(agg_welfare) <- c("date","welfare_pct","admin")
colnames(agg_report) <- c("date","report_pct","admin")
colnames(agg_hsi) <- c("date","hsi_pct","admin")
colnames(agg_weather) <- c("date","weather_pct","admin")

agg_crime$date <- as.Date(agg_crime$date)
agg_welfare$date <- as.Date(agg_welfare$date)
agg_report$date <- as.Date(agg_report$date)
agg_hsi$date <- as.Date(agg_hsi$date)
agg_weather$date <- as.Date(agg_weather$date)

######### TABLE A2 - BING ######### 

mean(agg_crime$crime_pct[agg_crime$admin=="Trump"])/mean(agg_crime$crime_pct[agg_crime$admin=="Obama"])
mean(agg_welfare$welfare_pct[agg_welfare$admin=="Trump"])/mean(agg_welfare$welfare_pct[agg_welfare$admin=="Obama"])
mean(agg_report$report_pct[agg_report$admin=="Trump"])/mean(agg_report$report_pct[agg_report$admin=="Obama"])
mean(agg_hsi$hsi_pct[agg_hsi$admin=="Trump"])/mean(agg_hsi$hsi_pct[agg_hsi$admin=="Obama"])

##### FIGURE A3 #####
ggplot(agg_hsi, aes(x=date, y=hsi_pct, color=admin))+geom_point(alpha=0.1)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("% Bing Searches\n(logged)")+stat_smooth(se=F, method="lm", size=2)+scale_y_continuous(trans='log10')

##### FIGURE 4 - BING #####

ggplot(agg_report, aes(x=date, y=report_pct, color=admin))+geom_point(alpha=0.1)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("% Bing Searches\n(logged)")+stat_smooth(se=F, method="lm", size=2)+scale_y_continuous(trans='log10')
ggplot(agg_crime, aes(x=date, y=crime_pct, color=admin))+geom_point(alpha=0.1)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("% Bing Searches\n(logged)")+stat_smooth(se=F, method="lm", size=2)+scale_y_continuous(trans='log10')
ggplot(agg_welfare, aes(x=date, y=welfare_pct, color=admin))+geom_point(alpha=0.1)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("% Bing Searches\n(logged)")+stat_smooth(se=F, method="lm", size=2)+scale_y_continuous(trans='log10')

##### TABLE A3 #####

lm_crime<- lm(crime_pct~date + admin, data=agg_crime)
vcov <- cluster.vcov(lm_crime, agg_crime$date)
coeftest(lm_crime, vcov = vcov)

lm_welfare<- lm(welfare_pct~date + admin, data=agg_welfare)
vcov <- cluster.vcov(lm_welfare, agg_welfare$date)
coeftest(lm_welfare, vcov = vcov)

lm_report<- lm(report_pct~date + admin, data=agg_report)
vcov <- cluster.vcov(lm_report, agg_report$date)
coeftest(lm_report, vcov = vcov)

lm_hsi<- lm(hsi_pct~date + admin, data=agg_hsi)
vcov <- cluster.vcov(lm_hsi, agg_hsi$date)
coeftest(lm_hsi, vcov = vcov)

#keep the dates where we have all three channels
#eg msnbc coverage not available in 2014, so drop 2014

news <- news[news$date>=as.Date("2015-01-01"),]
full <- merge(agg_weather,merge(agg_crime, merge(agg_welfare, merge(agg_report, merge(agg_hsi, news, by="date", all.y=T), all.y=T),all.y=T),all.y=T),all.y=T)

full$dow <- weekdays(full$date)
full$mo <- factor(month(full$date))

tot_segs <- aggregate(tot$duration, list(tot$date), length)

colnames(tot_segs)<- c("date","tot_mins")

tot_segs$date <- as.Date(tot_segs$date)

full <- merge(full, tot_segs, by="date")

full$trump_admin <- ifelse(full$date>=as.Date("2017-01-20"),1,0)

#########################
#########################
# GOOGLE SEARCH
#########################
#########################

setwd(here("release_data"))

############ FIGURE 4 - GOOGLE ############
#monthly plots
gt_report <- read.csv("google_trends_report.csv")
gt_crime <- read.csv("google_trends_crime.csv")
gt_welfare <- read.csv("google_trends_welfare.csv")

#assign each month a date for plotting
gt_report$date <- as.Date(paste0(gt_report$year,"-",str_pad(gt_report$month, width=2, side="left"),"-01"))
gt_crime$date <- as.Date(paste0(gt_crime$year,"-",str_pad(gt_crime$month, width=2, side="left"),"-01"))
gt_welfare$date <- as.Date(paste0(gt_welfare$year,"-",str_pad(gt_welfare$month, width=2, side="left"),"-01"))

#color by pres admin
gt_report$admin <- "Obama"
gt_report$admin[gt_report$year<2009]<- "Bush"
gt_report$admin[gt_report$year>2016]<- "Trump"

gt_crime$admin <- "Obama"
gt_crime$admin[gt_crime$year<2009]<- "Bush"
gt_crime$admin[gt_crime$year>2016]<- "Trump"

gt_welfare$admin <- "Obama"
gt_welfare$admin[gt_welfare$year<2009]<- "Bush"
gt_welfare$admin[gt_welfare$year>2016]<- "Trump"

# plot report, crime, and welfare
ggplot(gt_report, aes(x=date, y=search, color=admin))+geom_point(alpha=0.3)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("Google Trends")+stat_smooth(method="lm",se=F)
ggplot(gt_crime, aes(x=date, y=search, color=admin))+geom_point(alpha=0.3)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("Google Trends")+stat_smooth(method="lm",se=F)
ggplot(gt_welfare, aes(x=date, y=search, color=admin))+geom_point(alpha=0.3)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("Google Trends")+stat_smooth(method="lm",se=F)

############ TABLE 4 - GOOGLE ##########
gt_crime$admin <- relevel(factor(gt_crime$admin), ref="Obama")
summary(lm(search~date+admin, data=gt_crime))

gt_welfare$admin <- relevel(factor(gt_welfare$admin), ref="Obama")
summary(lm(search~date+admin, data=gt_welfare))

gt_report$admin <- relevel(factor(gt_report$admin), ref="Obama")
summary(lm(search~date+admin, data=gt_report))

############ TABLE A2 - GOOGLE ##########
mean(gt_crime$search[gt_crime$admin=="Trump"])/mean(gt_crime$search[gt_crime$admin=="Obama"])
mean(gt_welfare$search[gt_welfare$admin=="Trump"])/mean(gt_welfare$search[gt_welfare$admin=="Obama"])
mean(gt_report$search[gt_report$admin=="Trump"])/mean(gt_report$search[gt_report$admin=="Obama"])

#daily for regressions

g_crime <- read.csv("gt_crime_daily.csv")
g_welfare <- read.csv("gt_welfare_daily.csv")
g_report <- read.csv("gt_report_daily.csv")
g_weather <- read.csv("gt_weather_daily.csv")

g_crime$X <- NULL
g_welfare$X <- NULL
g_report$X <- NULL
g_weather$X <- NULL

g_crime$search <- NULL
g_welfare$search <- NULL
g_report$search <- NULL
g_weather$search <- NULL

g_crime$year <- NULL
g_welfare$year <- NULL
g_report$year <- NULL
g_weather$year <- NULL

g_crime$admin <- NULL
g_welfare$admin <- NULL
g_report$admin <- NULL
g_weather$admin <- NULL

colnames(g_crime) <- c("date","g_crime")
colnames(g_welfare) <- c("date","g_welfare")
colnames(g_report) <- c("date","g_report")
colnames(g_weather) <- c("date","g_weather")

google <- merge(g_weather,merge(g_crime, merge(g_welfare, g_report, by="date"), by="date"), by="date")
google$date <- as.Date(google$date)
full2 <- merge(full, google, by="date")

######## TABLE 6 - GOOGLE ########

g_report_lm <- lm(g_report~ dow+mo+tot_mins +trump_news:trump_admin+trump_news + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_report_lm, full2$date)
coeftest(g_report_lm)

######## TABLE A7 - GOOGLE ########
g_crime_lm <- lm(g_crime~ dow+mo+tot_mins  + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_crime_lm, full2$date)
coeftest(g_crime_lm)

g_welfare_lm <- lm(g_welfare~ dow+mo+tot_mins  + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_welfare_lm, full2$date)
coeftest(g_welfare_lm)

######## TABLE A12 - GOOGLE #######
g_weather_lm <- lm(g_weather~ dow+mo+tot_mins  + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_weather_lm, full2$date)
coeftest(g_weather_lm)

#########################
#########################
# TRUMP ADMIN TEST
#########################
#########################

####### TABLE 5 - GOOGLE #########

g_report_lm <- lm(g_report~ dow+mo+trump_news + trump_admin:trump_news +tot_mins  + crime_news+welfare_news+trump_admin+date, data=full2)
cluster.vcov(g_report_lm, full2$date)
coeftest(g_report_lm)

####### TABLES NOT REPORTED - GOOGLE #########

g_crime_lm <- lm(g_crime ~ dow+mo+trump_news + trump_admin:trump_news + tot_mins + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_crime_lm, full2$date)
coeftest(g_crime_lm)

g_welfare_lm <- lm(g_welfare~ dow+mo+trump_news + trump_admin:trump_news +tot_mins  + crime_news+welfare_news+trump_admin+date, data=full2)
vcov <- cluster.vcov(g_welfare_lm, full2$date)
coeftest(g_welfare_lm)

#########################
#########################
# EMOTIONS TEST
#########################
#########################

setwd(here("release_data"))

e <- read.csv(file="emotions_immigr.csv")

e <- e[,c("doc_id","emotion","mean", "date")]

e <- e[e$emotion %in% c("anger","anticipation","disgust","fear","joy","sadness","surprise","trust"),]

e2 <- reshape(e, idvar=c("doc_id","date"), direction="wide", timevar="emotion")

e3 <- NULL
for(i in unique(e2$date)){
  row <- c(i,colSums(e2[e2$date==i,3:10]))
  e3 <- rbind(e3, row)
}


e3 <- as.data.frame(e3)
e3$date <- as.Date(e3$V1)
full2 <- merge(full, e3, by="date")


full2$mean.anger <- as.numeric(as.character(full2$mean.anger))
full2$mean.fear <- as.numeric(as.character(full2$mean.fear))
full2$mean.sadness <- as.numeric(as.character(full2$mean.sadness))
full2$mean.disgust <- as.numeric(as.character(full2$mean.disgust))
full2$mean.trust <- as.numeric(as.character(full2$mean.trust))
full2$mean.joy <- as.numeric(as.character(full2$mean.joy))
full2$mean.anticipation <- as.numeric(as.character(full2$mean.anticipation))
full2$mean.surprise <- as.numeric(as.character(full2$mean.surprise))

####### GOOGLE ######## 

full2 <- merge(full, google, by="date")
full2 <- merge(full2, e3, by="date")

full2$mean.anger <- as.numeric(as.character(full2$mean.anger))
full2$mean.fear <- as.numeric(as.character(full2$mean.fear))
full2$mean.sadness <- as.numeric(as.character(full2$mean.sadness))
full2$mean.disgust <- as.numeric(as.character(full2$mean.disgust))
full2$mean.trust <- as.numeric(as.character(full2$mean.trust))
full2$mean.joy <- as.numeric(as.character(full2$mean.joy))
full2$mean.anticipation <- as.numeric(as.character(full2$mean.anticipation))
full2$mean.surprise <- as.numeric(as.character(full2$mean.surprise))

###### TABLE 7 - GOOGLE #######

g_report <- glm(g_report~mean.anger+mean.fear+mean.sadness+mean.disgust+tot_mins+trump_admin+date+dow+mo, data=full2)
g_report
g_report2 <- glm(g_report~g_crime + g_welfare + mean.anger+mean.fear+mean.sadness+mean.disgust+dow+mo+tot_mins  +trump_admin+date, data=full2)
g_report2
###### TABLE A8 - GOOGLE #######

g_crime <- glm(g_crime~mean.anger+mean.fear+mean.sadness+mean.disgust+dow+mo+tot_mins  +trump_admin+date, data=full2)
g_welfare <- glm(g_welfare~mean.anger+mean.fear+mean.sadness+mean.disgust+dow+mo+tot_mins  +trump_admin+date, data=full2)
